library(readr)
library(ggplot2)
library(e1071)
library(readxl)
library(gridExtra)
library(tidyverse)
library(countrycode)
library(sf)
library(tmap)
library(lintr)
library(plotly)
library(MASS) # for boxcox normalisation
library(rmapshaper)
library(mapview)
library(leaflet)
library(RColorBrewer)
library(DT)
library(GGally)
library(dplyr)
library(psych)csc3007-cyan2
1 Importing of libraries
2 Selecting the indicators
#mortality_rate_attributed_to_unsafe_water
mortality_rate_unsafe_water <- read_csv("data/indicator_3.9.2.csv",show_col_types = FALSE)
#Proportion_of_population_using_safely_managed_drinking_water_services
proportion_of_safe_water<- read_csv("data/indicator_6.1.1.csv",show_col_types = FALSE)
#mortality_rate_unintentional_poisoning
mortality_rate_unintentional_poisoning <- read_csv("data/indicator_3.9.3.csv",show_col_types = FALSE)
#proportion-of-population-with-basic-handwashing-facilities-on-premises-by-urban-rural-percent
population_with_basic_handwashing_facilities<- read_csv("data/indicator_6.2.1.csv",show_col_types = FALSE)3 Map of Indicator 1
3.1 Select specific columns for mortality rate unintentional poisoning
mortality_rate_unintentional_poisoning <- dplyr::select(
mortality_rate_unintentional_poisoning,
code = 'geoAreaCode',
country = 'geoAreaName',
region = 'parentName',
iso = 'ISO3',
gender = 'sex_desc',
mortality_rate_unintentional_poisoning_2019 = 'value_2019'
)
# drop rows if contains at least one NA value
mortality_rate_unintentional_poisoning <- na.omit(mortality_rate_unintentional_poisoning)
#check for empty row
any(is.na(mortality_rate_unintentional_poisoning))[1] FALSE
mortality_rate_unintentional_poisoning# A tibble: 597 × 6
code country region iso gender mortality_rate_unint…¹
<dbl> <chr> <chr> <chr> <chr> <dbl>
1 470 Malta Southern Europe MLT Both … 0.1
2 152 Chile South America CHL Male 0.5
3 4 Afghanistan Southern Asia (excludi… AFG Both … 1
4 470 Malta Southern Europe MLT Female 0
5 156 China Eastern Asia CHN Both … 1.8
6 4 Afghanistan Southern Asia (excludi… AFG Both … 1
7 470 Malta Southern Europe MLT Male 0.2
8 156 China Eastern Asia CHN Female 1.5
9 478 Mauritania Western Africa MRT Both … 1.5
10 478 Mauritania Western Africa MRT Female 1.3
# ℹ 587 more rows
# ℹ abbreviated name: ¹mortality_rate_unintentional_poisoning_2019
3.2 Histogram of mortality rate unintential poisoning for year 2019
# Create the histogram
mortality_histo <- ggplot(mortality_rate_unintentional_poisoning, aes(x=mortality_rate_unintentional_poisoning_2019)) +
geom_histogram(binwidth=0.1, fill='blue', color='black') +
labs(title="Mortality rate unintentional poisoning for Year 2019", x="Mortality Rate (per 100,000)", y="Number of Country")
mortality_histo
3.3 Select specific columns for population with basic handwashing facilities
population_with_basic_handwashing_facilities <- dplyr::select(
population_with_basic_handwashing_facilities,
code = 'geoAreaCode',
country = 'geoAreaName',
region = 'parentName',
iso = 'ISO3',
area = 'location_desc',
pop_with_basic_handwashing_facilites_2019 = 'value_2019'
)
# drop rows if contains at least one NA value
population_with_basic_handwashing_facilities <- na.omit(population_with_basic_handwashing_facilities)
#check for empty row
any(is.na(mortality_rate_unintentional_poisoning))[1] FALSE
population_with_basic_handwashing_facilities# A tibble: 284 × 6
code country region iso area pop_with_basic_handw…¹
<dbl> <chr> <chr> <chr> <chr> <dbl>
1 4 Afghanistan Southern Asia (excludi… AFG All … 38
2 356 India Southern Asia IND Urban 82
3 4 Afghanistan Southern Asia (excludi… AFG All … 38
4 360 Indonesia South-Eastern Asia IDN All … 94
5 4 Afghanistan Southern Asia (excludi… AFG Rural 29
6 694 Sierra Leone Western Africa SLE Rural 18
7 360 Indonesia South-Eastern Asia IDN Rural 91
8 4 Afghanistan Southern Asia (excludi… AFG Rural 29
9 4 Afghanistan Southern Asia (excludi… AFG Urban 64
10 4 Afghanistan Southern Asia (excludi… AFG Urban 64
# ℹ 274 more rows
# ℹ abbreviated name: ¹pop_with_basic_handwashing_facilites_2019
3.4 Histogram of population with basic handwashing facilities for year 2019
# Create the histogram
pop_with_handwashing_facilities_histo <- ggplot(population_with_basic_handwashing_facilities, aes(x=pop_with_basic_handwashing_facilites_2019)) +
geom_histogram(binwidth=5, fill='blue', color='black') +
labs(title="Population with basic handwashing facilities for Year 2019", x="Population (%)", y="Number of Country")
pop_with_handwashing_facilities_histo
3.5 Left join between mortality rate and basic handwashing
countries_tb <- left_join(
mortality_rate_unintentional_poisoning,
population_with_basic_handwashing_facilities,
by = join_by(country, iso)
) |>
mutate(
iso = case_match(
iso,
"COD" ~ "ZAR",
"ROU" ~ "ROM",
"TLS" ~ "TMP",
"XKX" ~ "KSV",
.default = iso
)
)Warning in left_join(mortality_rate_unintentional_poisoning, population_with_basic_handwashing_facilities, : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 3 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
countries_tb# A tibble: 1,323 × 10
code.x country region.x iso gender mortality_rate_unint…¹ code.y region.y
<dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
1 470 Malta Souther… MLT Both … 0.1 NA <NA>
2 152 Chile South A… CHL Male 0.5 NA <NA>
3 4 Afghanis… Souther… AFG Both … 1 4 Souther…
4 4 Afghanis… Souther… AFG Both … 1 4 Souther…
5 4 Afghanis… Souther… AFG Both … 1 4 Souther…
6 4 Afghanis… Souther… AFG Both … 1 4 Souther…
7 4 Afghanis… Souther… AFG Both … 1 4 Souther…
8 4 Afghanis… Souther… AFG Both … 1 4 Souther…
9 470 Malta Souther… MLT Female 0 NA <NA>
10 156 China Eastern… CHN Both … 1.8 NA <NA>
# ℹ 1,313 more rows
# ℹ abbreviated name: ¹mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: area <chr>,
# pop_with_basic_handwashing_facilites_2019 <dbl>
3.6 Import map
countries_sf <- read_sf("./data/WB_countries_Admin0.geojson")
land_sf <- read_sf("./data/WB_Land.geojson")countries_sf <-
countries_sf |>
ms_simplify() |> # Default argument `keep = 0.05`
st_make_valid()
land_sf <- ms_simplify(land_sf)
npts(countries_sf)[1] 30826
countries_sf <-
countries_sf |>
left_join(
countries_tb,
by = join_by(WB_A3 == iso)
) |>
dplyr::select(country, continent = 'CONTINENT', gender = 'gender', area = 'area', iso = WB_A3, mortality_rate_unintentional_poisoning_2019, pop_with_basic_handwashing_facilites_2019)
countries_sfSimple feature collection with 1246 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -180 ymin: -55.62754 xmax: 179.9038 ymax: 83.6341
Geodetic CRS: WGS 84
# A tibble: 1,246 × 8
country continent gender area iso mortality_rate_unintentional…¹
<chr> <chr> <chr> <chr> <chr> <dbl>
1 Indonesia Asia Both sexes All areas IDN 0.3
2 Indonesia Asia Both sexes Rural IDN 0.3
3 Indonesia Asia Both sexes Urban IDN 0.3
4 Indonesia Asia Female All areas IDN 0.1
5 Indonesia Asia Female Rural IDN 0.1
6 Indonesia Asia Female Urban IDN 0.1
7 Indonesia Asia Male All areas IDN 0.5
8 Indonesia Asia Male Rural IDN 0.5
9 Indonesia Asia Male Urban IDN 0.5
10 Malaysia Asia Both sexes <NA> MYS 0.7
# ℹ 1,236 more rows
# ℹ abbreviated name: ¹mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
# geometry <MULTIPOLYGON [°]>
polygon <- st_polygon(x = list(rbind(
c(-180.0001, 90),
c(-179.9999, 90),
c(-179.9999, -90),
c(-180.0001, -90),
c(-180.0001, 90)
))) |>
st_sfc() |>
st_set_crs(4326) # Equirectangular projection
countries_sf <- mutate(
countries_sf,
geometry = st_difference(geometry, polygon)
) |>
st_make_valid()
tm_shape(countries_sf) + tm_polygons()
3.7 Map of population with basic handwashing facilities for year 2019
countries_sf$area <- replace_na(countries_sf$area, "missing")
choropleth_pop <-
tm_shape(land_sf, projection = "ESRI:54012") +
tm_polygons(col = "grey") +
tm_shape(countries_sf) +
tm_polygons(
col = "pop_with_basic_handwashing_facilites_2019",
border.col = "grey30",
palette = "Greens",
breaks = c(-Inf, 20, 40, 60, 80, 100, Inf),
colorNA = "grey",
title = "Population (%)",
lwd = 0.5
) +
tm_text("iso", size = "AREA") + # Use the new column to set the size
tm_credits(
c("", "Source: unstats-undesa.opendata.arcgis.com"),
position = c("right", "bottom")
) +
tm_layout(
bg.color = "lightblue",
frame = FALSE,
inner.margins = c(0.00, 0.2, 0.2, 0.1),
earth.boundary = TRUE,
space.color = "white",
main.title.size = 0.8,
title.size = 0.5,
main.title = "Population with basic handwashing facilities by Country for 2019"
)
choropleth_pop
3.8 Map of mortality rate unintentional poisoning for year 2019
countries_sf$gender <- replace_na(countries_sf$gender, "missing")
choropleth_mor<-
tm_shape(land_sf, projection = "ESRI:54012") +
tm_polygons(col = "grey") +
tm_shape(countries_sf) +
tm_polygons(
col = "mortality_rate_unintentional_poisoning_2019",
border.col = "grey30",
palette = "Oranges",
breaks = c(-Inf, 0.2, 0.4, 0.6, 0.8, 1, Inf),
colorNA = "grey",
title = "Mortality Rate \nper 100,000",
lwd = 0.5
) +
tm_text("iso", size = "AREA") +
tm_credits(
c("", "Source: unstats-undesa.opendata.arcgis.com"),
position = c("right", "bottom")
) +
tm_layout(
bg.color = "lightblue",
frame = FALSE,
inner.margins = c(0.00, 0.2, 0.2, 0.1),
earth.boundary = TRUE,
space.color = "white",
main.title.size = 0.8,
title.size = 0.5,
main.title = "Mortality Rate Unintentional Poisoning by Country for 2019"
)
choropleth_mor
3.9 Filtering of areas
countries_sf_all_areas <- countries_sf[countries_sf$area == "All areas",]
countries_sf_urban <- countries_sf[countries_sf$area == "Urban",]
countries_sf_rural <- countries_sf[countries_sf$area == "Rural",]
countries_sf_all_areasSimple feature collection with 324 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -172.7497 ymin: -34.80869 xmax: 166.1479 ymax: 55.38515
Geodetic CRS: WGS 84
# A tibble: 324 × 8
country continent gender area iso mortality_rate_unint…¹
<chr> <chr> <chr> <chr> <chr> <dbl>
1 Indonesia Asia Both … All … IDN 0.3
2 Indonesia Asia Female All … IDN 0.1
3 Indonesia Asia Male All … IDN 0.5
4 Bolivia (Plurinational S… South Am… Both … All … BOL 0.6
5 Bolivia (Plurinational S… South Am… Female All … BOL 0.5
6 Bolivia (Plurinational S… South Am… Male All … BOL 0.6
7 India Asia Both … All … IND 0.3
8 India Asia Female All … IND 0.2
9 India Asia Male All … IND 0.3
10 Ethiopia Africa Both … All … ETH 3.3
# ℹ 314 more rows
# ℹ abbreviated name: ¹mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
# geometry <MULTIPOLYGON [°]>
countries_sf_urbanSimple feature collection with 306 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -157.449 ymin: -34.80869 xmax: 166.1479 ymax: 55.38515
Geodetic CRS: WGS 84
# A tibble: 306 × 8
country continent gender area iso mortality_rate_unint…¹
<chr> <chr> <chr> <chr> <chr> <dbl>
1 Indonesia Asia Both … Urban IDN 0.3
2 Indonesia Asia Female Urban IDN 0.1
3 Indonesia Asia Male Urban IDN 0.5
4 Bolivia (Plurinational S… South Am… Both … Urban BOL 0.6
5 Bolivia (Plurinational S… South Am… Female Urban BOL 0.5
6 Bolivia (Plurinational S… South Am… Male Urban BOL 0.6
7 India Asia Both … Urban IND 0.3
8 India Asia Female Urban IND 0.2
9 India Asia Male Urban IND 0.3
10 Ethiopia Africa Both … Urban ETH 3.3
# ℹ 296 more rows
# ℹ abbreviated name: ¹mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
# geometry <MULTIPOLYGON [°]>
countries_sf_ruralSimple feature collection with 309 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -157.449 ymin: -34.80869 xmax: 166.1479 ymax: 55.38515
Geodetic CRS: WGS 84
# A tibble: 309 × 8
country continent gender area iso mortality_rate_unint…¹
<chr> <chr> <chr> <chr> <chr> <dbl>
1 Indonesia Asia Both … Rural IDN 0.3
2 Indonesia Asia Female Rural IDN 0.1
3 Indonesia Asia Male Rural IDN 0.5
4 Bolivia (Plurinational S… South Am… Both … Rural BOL 0.6
5 Bolivia (Plurinational S… South Am… Female Rural BOL 0.5
6 Bolivia (Plurinational S… South Am… Male Rural BOL 0.6
7 Peru South Am… Both … Rural PER 0.4
8 Peru South Am… Female Rural PER 0.3
9 Peru South Am… Male Rural PER 0.5
10 India Asia Both … Rural IND 0.3
# ℹ 299 more rows
# ℹ abbreviated name: ¹mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
# geometry <MULTIPOLYGON [°]>
# Get a color palette with three colors
colors <- brewer.pal(3, "Set1")
# Assign colors to the categories
colorpal_area <- colorFactor(colors,
levels = c("All areas", "Urban", "Rural"))3.10 Map of population with basic handwashing facilities for all areas
leaflet(countries_sf_all_areas) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(fillColor = ~colorpal_area(area),
fillOpacity = 0.8,
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = ~iso,
popup = ~paste("Country Code: ", iso,
"<br>Country: ", country,
"<br>Population with basic handwashing facilities (%): ", pop_with_basic_handwashing_facilites_2019,
"<br>Area: ", area)) %>%
addLegend(pal = colorpal_area,
values = ~area,
title = "Area",
position = "bottomright")3.11 Map of population with basic handwashing facilities for urban areas
leaflet(countries_sf_urban) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(fillColor = ~colorpal_area(area), # use colorpal_area here
fillOpacity = 0.8,
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = ~iso, # This adds labels that appear when you hover over a country
popup = ~paste("Country Code: ", iso,
"<br>Country: ", country,
"<br>Population with basic handwashing facilities (%): ", pop_with_basic_handwashing_facilites_2019,
"<br>Area: ", area)) %>%
addLegend(pal = colorpal_area,
values = ~area,
title = "Area",
position = "bottomright")3.12 Map of population with basic handwashing facilities for rural areas
leaflet(countries_sf_rural) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(fillColor = ~colorpal_area(area), # use colorpal_area here
fillOpacity = 0.8,
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = ~iso, # This adds labels that appear when you hover over a country
popup = ~paste("Country Code: ", iso,
"<br>Country: ", country,
"<br>Population with basic handwashing facilities (%): ", pop_with_basic_handwashing_facilites_2019,
"<br>Area: ", area)) %>%
addLegend(pal = colorpal_area,
values = ~area,
title = "Area",
position = "bottomright")3.13 Filtering of genders
countries_sf_male <- countries_sf[countries_sf$gender == "Male",]
countries_sf_female <- countries_sf[countries_sf$gender == "Female",]
countries_sf_both <- countries_sf[countries_sf$gender == "Both sexes",]
countries_sf_maleSimple feature collection with 410 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.9999 ymin: -55.62754 xmax: 179.9999 ymax: 83.11652
Geodetic CRS: WGS 84
# A tibble: 410 × 8
country continent gender area iso mortality_rate_unint…¹
<chr> <chr> <chr> <chr> <chr> <dbl>
1 Indonesia Asia Male All … IDN 0.5
2 Indonesia Asia Male Rural IDN 0.5
3 Indonesia Asia Male Urban IDN 0.5
4 Malaysia Asia Male miss… MYS 1
5 Chile South Am… Male miss… CHL 0.5
6 Bolivia (Plurinational S… South Am… Male All … BOL 0.6
7 Bolivia (Plurinational S… South Am… Male Rural BOL 0.6
8 Bolivia (Plurinational S… South Am… Male Urban BOL 0.6
9 Peru South Am… Male Rural PER 0.5
10 Argentina South Am… Male miss… ARG 0.5
# ℹ 400 more rows
# ℹ abbreviated name: ¹mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
# geometry <MULTIPOLYGON [°]>
countries_sf_femaleSimple feature collection with 410 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.9999 ymin: -55.62754 xmax: 179.9999 ymax: 83.11652
Geodetic CRS: WGS 84
# A tibble: 410 × 8
country continent gender area iso mortality_rate_unint…¹
<chr> <chr> <chr> <chr> <chr> <dbl>
1 Indonesia Asia Female All … IDN 0.1
2 Indonesia Asia Female Rural IDN 0.1
3 Indonesia Asia Female Urban IDN 0.1
4 Malaysia Asia Female miss… MYS 0.3
5 Chile South Am… Female miss… CHL 0.2
6 Bolivia (Plurinational S… South Am… Female All … BOL 0.5
7 Bolivia (Plurinational S… South Am… Female Rural BOL 0.5
8 Bolivia (Plurinational S… South Am… Female Urban BOL 0.5
9 Peru South Am… Female Rural PER 0.3
10 Argentina South Am… Female miss… ARG 0.3
# ℹ 400 more rows
# ℹ abbreviated name: ¹mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
# geometry <MULTIPOLYGON [°]>
countries_sf_bothSimple feature collection with 410 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.9999 ymin: -55.62754 xmax: 179.9999 ymax: 83.11652
Geodetic CRS: WGS 84
# A tibble: 410 × 8
country continent gender area iso mortality_rate_unint…¹
<chr> <chr> <chr> <chr> <chr> <dbl>
1 Indonesia Asia Both … All … IDN 0.3
2 Indonesia Asia Both … Rural IDN 0.3
3 Indonesia Asia Both … Urban IDN 0.3
4 Malaysia Asia Both … miss… MYS 0.7
5 Chile South Am… Both … miss… CHL 0.4
6 Bolivia (Plurinational S… South Am… Both … All … BOL 0.6
7 Bolivia (Plurinational S… South Am… Both … Rural BOL 0.6
8 Bolivia (Plurinational S… South Am… Both … Urban BOL 0.6
9 Peru South Am… Both … Rural PER 0.4
10 Argentina South Am… Both … miss… ARG 0.4
# ℹ 400 more rows
# ℹ abbreviated name: ¹mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
# geometry <MULTIPOLYGON [°]>
# Get a color palette with three colors
colors <- brewer.pal(3, "Set1")
# Assign colors to the categories
colorpal_gender <- colorFactor(colors,
levels = c("Female", "Male", "Both sexes"))3.14 Map of mortality rate unintentional poisoning for male gender
# Replace NAs in 'gender' column
countries_sf_male$gender <- replace_na(countries_sf_male$gender, "missing")
leaflet(countries_sf_male) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(fillColor = ~colorpal_gender(gender),
fillOpacity = 0.8,
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = ~iso,
popup = ~paste("Country Code: ", iso,
"<br>Country: ", country,
"<br>Mortality Rate Unintentional Poisoning per 100,000: ", mortality_rate_unintentional_poisoning_2019,
"<br>Gender: ", gender)) %>%
addLegend(pal = colorpal_gender,
values = ~gender,
title = "Gender",
position = "bottomright")3.15 Map of mortality rate unintentional poisoning for female gender
# Replace NAs in 'gender' column
countries_sf_female$gender <- replace_na(countries_sf_female$gender, "missing")
leaflet(countries_sf_female) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(fillColor = ~colorpal_gender(gender),
fillOpacity = 0.8,
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = ~iso,
popup = ~paste("Country Code: ", iso,
"<br>Country: ", country,
"<br>Mortality Rate Unintentional Poisoning per 100,000: ", mortality_rate_unintentional_poisoning_2019,
"<br>Gender: ", gender)) %>%
addLegend(pal = colorpal_gender,
values = ~gender,
title = "Gender",
position = "bottomright")3.16 Map of mortality rate unintentional poisoning for both gender
# Replace NAs in 'gender' column
countries_sf_both$gender <- replace_na(countries_sf_both$gender, "missing")
leaflet(countries_sf_both) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(fillColor = ~colorpal_gender(gender), # Assign color based on gender
fillOpacity = 0.8,
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = ~iso,
popup = ~paste("Country Code: ", iso,
"<br>Country: ", country,
"<br>Mortality Rate Unintentional Poisoning per 100,000: ", mortality_rate_unintentional_poisoning_2019,
"<br>Gender: ", gender)) %>%
addLegend(pal = colorpal_gender,
values = ~gender,
title = "Gender",
position = "bottomright")4 Plots of Indicator between mortality rate and population with basic handwashing facilities
countries_sf <- countries_sf[!is.na(countries_sf$mortality_rate_unintentional_poisoning_2019) & !is.na(countries_sf$pop_with_basic_handwashing_facilites_2019), ]
countries_sf <- countries_sf %>%
filter(gender == 'Both sexes')
countries_sfSimple feature collection with 313 features and 7 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -172.7497 ymin: -34.80869 xmax: 166.1479 ymax: 55.38515
Geodetic CRS: WGS 84
# A tibble: 313 × 8
country continent gender area iso mortality_rate_unint…¹
* <chr> <chr> <chr> <chr> <chr> <dbl>
1 Indonesia Asia Both … All … IDN 0.3
2 Indonesia Asia Both … Rural IDN 0.3
3 Indonesia Asia Both … Urban IDN 0.3
4 Bolivia (Plurinational S… South Am… Both … All … BOL 0.6
5 Bolivia (Plurinational S… South Am… Both … Rural BOL 0.6
6 Bolivia (Plurinational S… South Am… Both … Urban BOL 0.6
7 Peru South Am… Both … Rural PER 0.4
8 India Asia Both … Urban IND 0.3
9 India Asia Both … All … IND 0.3
10 India Asia Both … Rural IND 0.3
# ℹ 303 more rows
# ℹ abbreviated name: ¹mortality_rate_unintentional_poisoning_2019
# ℹ 2 more variables: pop_with_basic_handwashing_facilites_2019 <dbl>,
# geometry <MULTIPOLYGON [°]>
4.1 Correlation
# Plot the data
plot_loess <- ggplot(countries_sf, aes(x = mortality_rate_unintentional_poisoning_2019, y = pop_with_basic_handwashing_facilites_2019)) +
geom_point(aes(color = area,
shape = area,
text = paste("Country:", country, "<br>", "Location:", area)), alpha = 0.7) +
geom_smooth(data = subset(countries_sf, area == "All areas"),
method = loess, se = FALSE,
aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
show.legend = TRUE) +
geom_smooth(data = subset(countries_sf, area == "Urban"),
method = loess, se = FALSE,
aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
show.legend = TRUE) +
geom_smooth(data = subset(countries_sf, area == "Rural"),
method = loess, se = FALSE,
aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
show.legend = TRUE) +
labs(x = "Mortality Rate (deaths per 100,000 population) for 2019 ",
y = "Population with Basic Handwashing Facilities for 2019 (%)",
title = "Correlation between Mortality Rate and Basic Handwashing Facilities \n for Both Gender",
color = "Location Type",
shape = "Location Type") +
theme_minimal() +
guides(color = guide_legend(override.aes = list(shape = 16)),
shape = guide_legend(override.aes = list(color = "black")))Warning in geom_point(aes(color = area, shape = area, text = paste("Country:",
: Ignoring unknown aesthetics: text
# Convert to a plotly plot
gp_map_1 <- ggplotly(plot_loess, tooltip = "text")`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
gp_map_1correlation_all <- cor(countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "All areas"],
countries_sf$mortality_rate_unintentional_poisoning_2019[countries_sf$area == "All areas"])
correlation_urban <- cor(countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "Urban"],
countries_sf$mortality_rate_unintentional_poisoning_2019[countries_sf$area == "Urban"])
correlation_rural <- cor(countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "Rural"],
countries_sf$mortality_rate_unintentional_poisoning_2019[countries_sf$area == "Rural"])
blank_plot <- ggplot() +
theme_void() +
theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
annotate("text",
x = 0.5,
y = 0.5,
label = paste("Correlation All:",
round(correlation_all, 2), "\n",
"Correlation Urban:",
round(correlation_urban, 2), "\n",
"Correlation Rural:",
round(correlation_rural, 2)),
size = 5,
color = "black",
hjust = 0.5)
# Convert to a plotly plot
gp_text_map_1 <- ggplotly(blank_plot)
gp_text_map_1stacked_bar_chart <- ggplot(countries_sf, aes(x = mortality_rate_unintentional_poisoning_2019, fill = area)) +
geom_bar(position = "stack") +
labs(x = "Mortality Rate (deaths per 100,000 population)",
y = "Count",
title = "Stacked Bar Chart of Mortality Rates") +
theme_minimal()
# Convert to a plotly plot
ggplot_stacked_bar_chart <- ggplotly(stacked_bar_chart)
# Create a grouped bar chart
grouped_bar_chart <- ggplot(countries_sf, aes(x = mortality_rate_unintentional_poisoning_2019, fill = area)) +
geom_bar(position = "dodge") +
labs(x = "Mortality Rate (deaths per 100,000 population)",
y = "Count",
title = "Grouped Bar Chart of Mortality Rates") +
theme_minimal()
# Convert to a plotly plot
ggplot_grouped_bar_chart <- ggplotly(grouped_bar_chart)4.2 Barchart and stacked barchart (without Transformation)
ggplot_stacked_bar_chart <- ggplotly(stacked_bar_chart, width = 1000, height = 600, autosize = TRUE)
ggplot_grouped_bar_chart <- ggplotly(grouped_bar_chart, width = 1000, height = 600, autosize = TRUE)
ggplot_stacked_bar_chartggplot_grouped_bar_chart4.3 Normalise Data
#Ensure you have the MASS library installed
countries_sf$mortality_rate_unintentional_poisoning_2019.positive <- countries_sf$mortality_rate_unintentional_poisoning_2019 + abs(min(countries_sf$mortality_rate_unintentional_poisoning_2019)) + 0.1
# Apply the Box-Cox transformation
bc_result <- MASS::boxcox(countries_sf$mortality_rate_unintentional_poisoning_2019.positive ~ 1,
lambda = seq(-3,3,0.1))
# The optimal lambda value is the one that maximizes the log-likelihood
optimal_lambda <- bc_result$x[which.max(bc_result$y)]
# Transform the data using the optimal lambda value
if (optimal_lambda == 0) {
countries_sf$latest_value.x_bc <- log(countries_sf$mortality_rate_unintentional_poisoning_2019.positive)
} else {
countries_sf$latest_value.x_bc <- (countries_sf$mortality_rate_unintentional_poisoning_2019.positive^optimal_lambda - 1) / optimal_lambda
}
# Shift the transformed variable to be non-negative
min_value <- min(countries_sf$latest_value.x_bc)
countries_sf$latest_value.x_bc <- countries_sf$latest_value.x_bc + abs(min_value) + 0.14.4 Histogram (Normalised)
# Round the transformed mortality rates to the nearest whole number
countries_sf$latest_value.x_bc_rounded <- round(countries_sf$latest_value.x_bc)
# Convert the latest_value.x_bc_rounded variable into a categorical variable
countries_sf$latest_value.x_bc_cat <- cut(countries_sf$latest_value.x_bc_rounded, breaks = 50)
# Create a stacked bar chart
stacked_bar_chart_bc <- ggplot(countries_sf, aes(x = latest_value.x_bc_rounded, fill = area)) +
geom_bar(position = "stack", bins =20) +
labs(x = "Transformed Mortality Rate",
y = "Count",
title = "Stacked Bar Chart of Transformed Mortality Rates") +
theme_minimal()Warning in geom_bar(position = "stack", bins = 20): Ignoring unknown
parameters: `bins`
ggplot_stacked_bar_chart_bc <- ggplotly(stacked_bar_chart_bc)
ggplot_stacked_bar_chart_bc# Create a grouped bar chart
grouped_bar_chart_bc <- ggplot(countries_sf, aes(x = latest_value.x_bc_rounded, fill = area)) +
geom_bar(position = "dodge") +
labs(x = "Transformed Mortality Rate",
y = "Count",
title = "Grouped Bar Chart of Transformed Mortality Rates") +
theme_minimal()
# Convert to a plotly plot
ggplot_grouped_bar_chart_bc <- ggplotly(grouped_bar_chart_bc)
ggplot_grouped_bar_chart_bc4.5 Scatter plot Normalised
plot_loess <- ggplot(countries_sf, aes(x = latest_value.x_bc, y = pop_with_basic_handwashing_facilites_2019)) +
geom_point(aes(color = area,
shape = area,
text = paste("Country:", country, "<br>", "Location:", area)), alpha = 0.7) +
geom_smooth(data = subset(countries_sf, area == "All areas"),
method = loess, se = FALSE,
aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
show.legend = TRUE) +
geom_smooth(data = subset(countries_sf, area == "Urban"),
method = loess,
se = FALSE,
aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
show.legend = TRUE) +
geom_smooth(data = subset(countries_sf, area == "Rural"),
method = loess,
se = FALSE,
aes(color = area, weight = pop_with_basic_handwashing_facilites_2019),
show.legend = TRUE) +
labs(x = "Transformed Mortality Rate (deaths per 100,000 population) for 2019",
y = "Population with Basic Handwashing Facilities for 2019 (%)",
title = "Correlation between Mortality Rate and Basic Handwashing Facilities (Normalised) for Both Gender",
color = "Location Type",
shape = "Location Type") +
theme_minimal() +
guides(color = guide_legend(override.aes = list(shape = 16)),
shape = guide_legend(override.aes = list(color = "black")))Warning in geom_point(aes(color = area, shape = area, text = paste("Country:",
: Ignoring unknown aesthetics: text
gp_normalised_map_1 <- ggplotly(plot_loess, tooltip = "text")`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
gp_normalised_map_14.6 Correlation Normalised
correlation_all <- cor(countries_sf$latest_value.x_bc[countries_sf$area == "All areas"],
countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "All areas"])
correlation_urban <- cor(countries_sf$latest_value.x_bc[countries_sf$area == "Urban"],
countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "Urban"])
correlation_rural <- cor(countries_sf$latest_value.x_bc[countries_sf$area == "Rural"],
countries_sf$pop_with_basic_handwashing_facilites_2019[countries_sf$area == "Rural"])
blank_plot <- ggplot() +
theme_void() +
theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
annotate("text",
x = 0.5,
y = 0.5,
label = paste("Correlation All:",
round(correlation_all, 2), "\n",
"Correlation Urban:",
round(correlation_urban, 2), "\n",
"Correlation Rural:",
round(correlation_rural, 2)),
size = 5,
color = "black",
hjust = 0.5)
gp_text_normalised_map_1 <- ggplotly(blank_plot)
gp_text_normalised_map_14.7 Combining normal and unormalised plots
combined_plot_map_1 <- subplot(
gp_map_1, gp_text_map_1,
gp_normalised_map_1, gp_text_normalised_map_1,
nrows = 2, margin = 0.05
)
# Print the combined plot
combined_plot_map_15 Map of Indicator 2
6 Retrieve Data
6.1 Mortality Rate due to Unsafe Water
mortality_rate_unsafe_water_map <- dplyr::select(
mortality_rate_unsafe_water,
code = 'geoAreaCode',
country = 'geoAreaName',
region = 'parentName',
iso = 'ISO3',
mortality_rate_unsafe_water = 'latest_value'
)
mortality_rate_unsafe_water_map <- mortality_rate_unsafe_water_map%>%
distinct()
mortality_rate_unsafe_water_map# A tibble: 183 × 5
code country region iso mortality_rate_unsaf…¹
<dbl> <chr> <chr> <chr> <dbl>
1 583 Micronesia (Federated States of) Oceania … FSM 3.6
2 496 Mongolia Eastern … MNG 1.3
3 499 Montenegro Southern… MNE 0
4 504 Morocco Northern… MAR 1.9
5 508 Mozambique Eastern … MOZ 27.6
6 104 Myanmar South-Ea… MMR 12.6
7 516 Namibia Southern… NAM 18.3
8 524 Nepal Southern… NPL 19.8
9 528 Netherlands Western … NLD 0.2
10 554 New Zealand Australi… NZL 0.1
# ℹ 173 more rows
# ℹ abbreviated name: ¹mortality_rate_unsafe_water
6.2 Proportion of safe water management
proportion_of_safe_water_map <- dplyr::select(
proportion_of_safe_water,
code = 'geoAreaCode',
country = 'geoAreaName',
region = 'parentName',
iso = 'ISO3',
urbanisation = "location_desc",
value_2019 = "value_2019"
)
proportion_of_safe_water_map <- proportion_of_safe_water_map%>%
distinct()
proportion_of_safe_water_map# A tibble: 286 × 6
code country region iso urbanisation value_2019
<dbl> <chr> <chr> <chr> <chr> <dbl>
1 807 North Macedonia South… MKD Rural 66
2 804 Ukraine Easte… UKR Urban 89
3 807 North Macedonia South… MKD Urban 85
4 826 United Kingdom of Great Britain a… North… GBR All areas 100
5 580 Northern Mariana Islands Ocean… MNP All areas 91
6 840 United States of America North… USA All areas 97
7 578 Norway North… NOR All areas 99
8 512 Oman Weste… OMN All areas 90
9 586 Pakistan South… PAK All areas 36
10 586 Pakistan South… PAK Rural 33
# ℹ 276 more rows
7 Cleaning and fixing geometry data
countries_geom <- read_sf("./data/WB_countries_Admin0.geojson")
countries_boundary <- st_boundary(countries_geom)
countries_sf_simplified <- ms_simplify(countries_boundary, keep = 0.05)
countries_sf_valid <- st_make_valid(countries_sf_simplified)
countries_wrapped <- st_wrap_dateline(countries_sf_valid)
polygon <- st_polygon(x = list(rbind(c(-180.0001, 90),
c(-179.9999, 90),
c(-179.9999, -90),
c(-180.0001, -90),
c(-180.0001, 90)))) |>
st_sfc() |>
st_set_crs(4326)
countries_wrapped <- countries_wrapped |>
st_difference(polygon)Warning: attribute variables are assumed to be spatially constant throughout
all geometries
land_sf <- read_sf("data/WB_Land.geojson")
land_boundary <- st_boundary(land_sf)
land_sf_simplified <- ms_simplify(land_boundary, keep = 0.05)
land_sf_valid <- st_make_valid(land_sf_simplified)
land_wrapped <- st_wrap_dateline(land_sf_valid)
polygon <- st_polygon(x = list(rbind(c(-180.0001, 90),
c(-179.9999, 90),
c(-179.9999, -90),
c(-180.0001, -90),
c(-180.0001, 90)))) |>
st_sfc() |>
st_set_crs(4326)
land_wrapped <- land_wrapped |>
st_difference(polygon)Warning: attribute variables are assumed to be spatially constant throughout
all geometries
countries_wrapped <- countries_wrapped |>
st_difference(polygon)Warning: attribute variables are assumed to be spatially constant throughout
all geometries
# Washing data combined
countries_sf_water <- left_join(
countries_wrapped,
proportion_of_safe_water_map,
by = c("WB_A3" = "iso")
)Warning in sf_column %in% names(g): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 3 of `x` matches multiple rows in `y`.
ℹ Row 6 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
countries_sf_waterSimple feature collection with 325 features and 57 fields
Geometry type: MULTILINESTRING
Dimension: XY
Bounding box: xmin: -179.9999 ymin: -55.62754 xmax: 179.9999 ymax: 83.6341
Geodetic CRS: WGS 84
# A tibble: 325 × 58
REGION_WB ISO_N3 ISO_A3_EH NAME_AR ISO_A3 GDP_MD_EST NAME_VI WB_NAME ISO_A2
<chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
1 East Asia … 360 IDN إندوني… IDN 3028000 Indone… Indone… ID
2 East Asia … 458 MYS ماليزيا MYS 863000 Malays… Malays… MY
3 Latin Amer… 152 CHL تشيلي CHL 436100 Chile Chile CL
4 Latin Amer… 152 CHL تشيلي CHL 436100 Chile Chile CL
5 Latin Amer… 068 BOL بوليفيا BOL 78350 Bolivia Bolivia BO
6 Latin Amer… 604 PER بيرو PER 410400 Peru Peru PE
7 Latin Amer… 604 PER بيرو PER 410400 Peru Peru PE
8 Latin Amer… 604 PER بيرو PER 410400 Peru Peru PE
9 Latin Amer… 032 ARG الأرجن… ARG 879400 Argent… Argent… AR
10 Europe & C… 196 CYP قبرص CYP 29260 Cộng h… Cyprus CY
# ℹ 315 more rows
# ℹ 49 more variables: UN_A3 <chr>, FIPS_10_ <chr>, NAME_EN <chr>,
# NAME_EL <chr>, WB_RULES <chr>, OBJECTID <int>, NAME_DE <chr>,
# SUBREGION <chr>, featurecla <chr>, NAME_ZH <chr>, WB_A3 <chr>,
# LASTCENSUS <int>, TYPE <chr>, NAME_PT <chr>, NAME_BN <chr>,
# FORMAL_EN <chr>, POP_YEAR <int>, REGION_UN <chr>, LEVEL <int>,
# POP_RANK <int>, NAME_IT <chr>, FORMAL_FR <chr>, CONTINENT <chr>, …
# Mortality Rate Combined
countries_sf_mortality <- left_join(
countries_wrapped,
mortality_rate_unsafe_water_map,
by = c("WB_A3" = "iso")
)
countries_sf_mortalitySimple feature collection with 197 features and 56 fields
Geometry type: MULTILINESTRING
Dimension: XY
Bounding box: xmin: -179.9999 ymin: -55.62754 xmax: 179.9999 ymax: 83.6341
Geodetic CRS: WGS 84
# A tibble: 197 × 57
REGION_WB ISO_N3 ISO_A3_EH NAME_AR ISO_A3 GDP_MD_EST NAME_VI WB_NAME ISO_A2
<chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
1 East Asia … 360 IDN إندوني… IDN 3028000 Indone… Indone… ID
2 East Asia … 458 MYS ماليزيا MYS 863000 Malays… Malays… MY
3 Latin Amer… 152 CHL تشيلي CHL 436100 Chile Chile CL
4 Latin Amer… 068 BOL بوليفيا BOL 78350 Bolivia Bolivia BO
5 Latin Amer… 604 PER بيرو PER 410400 Peru Peru PE
6 Latin Amer… 032 ARG الأرجن… ARG 879400 Argent… Argent… AR
7 Europe & C… 196 CYP قبرص CYP 29260 Cộng h… Cyprus CY
8 South Asia 356 IND الهند IND 8721000 Ấn Độ India IN
9 East Asia … 156 CHN جمهوري… CHN 21140000 Cộng h… China CN
10 Middle Eas… 376 ISR إسرائيل ISR 297000 Israel Israel IL
# ℹ 187 more rows
# ℹ 48 more variables: UN_A3 <chr>, FIPS_10_ <chr>, NAME_EN <chr>,
# NAME_EL <chr>, WB_RULES <chr>, OBJECTID <int>, NAME_DE <chr>,
# SUBREGION <chr>, featurecla <chr>, NAME_ZH <chr>, WB_A3 <chr>,
# LASTCENSUS <int>, TYPE <chr>, NAME_PT <chr>, NAME_BN <chr>,
# FORMAL_EN <chr>, POP_YEAR <int>, REGION_UN <chr>, LEVEL <int>,
# POP_RANK <int>, NAME_IT <chr>, FORMAL_FR <chr>, CONTINENT <chr>, …
8 Creating the Maps
8.1 Get the different values for all areas, urban and rural in water management
countries_sf_water_all <- countries_sf_water %>% filter(!(urbanisation %in% c("Rural","Urban")))
countries_sf_water_urban <- countries_sf_water %>% filter(!(urbanisation %in% c("All area","Urban")))
countries_sf_water_rural <- countries_sf_water %>% filter(!(urbanisation %in% c("Urban","All areas")))8.1.1 all areas for water management
# Load the required libraries
library(leaflet)
# Define the color palette function
colorPalette <- colorRampPalette(c("#FF6969", "#00A0D6"))
# Generate the color palette based on the number of breaks
numBreaks <- 6
colors <- colorPalette(numBreaks)
# Create a Leaflet map
all_map <- leaflet(countries_sf_water_all) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(fillColor = ~colorNumeric(palette=colors, domain = c(0, 20, 40, 60, 80, 100))(value_2019),
fillOpacity = 0.8,
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = ~WB_A3,
popup = ~paste("Country Code: ", WB_A3,
"<br>Country: ", country,
"<br>Proportion of population access to safely managed drinking water service: ", value_2019))%>%
addLegend(
position = "bottomright",
pal = colorNumeric(palette = colors, domain = c(0, 100)),
values = c(0, 20, 40, 60, 80, 100),
title = "Proportion (%)",
labels = c("0-20", "20-40", "40-60", "60-80", "80-100"),
opacity = 1
)
all_map8.1.2 Urban areas
# Define the color palette function
colorPalette <- colorRampPalette(c("#FF6969", "#00A0D6"))
# Generate the color palette based on the number of breaks
numBreaks <- 6
colors <- colorPalette(numBreaks)
# Create a Leaflet map
urban_map <- leaflet(countries_sf_water_urban) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(fillColor = ~colorNumeric(palette=colors, domain = c(0, 20, 40, 60, 80, 100))(value_2019),
fillOpacity = 0.8,
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = ~WB_A3,
popup = ~paste("Country Code: ", WB_A3,
"<br>Country: ", country,
"<br>Proportion of population access to safely managed drinking water service: ", value_2019))%>%
addLegend(
position = "bottomright",
pal = colorNumeric(palette = colors, domain = c(0, 100)),
values = c(0, 20, 40, 60, 80, 100),
title = "Proportion (%)",
labels = c("0-20", "20-40", "40-60", "60-80", "80-100"),
opacity = 1
)
urban_map8.1.3 Rural
# Define the color palette function
colorPalette <- colorRampPalette(c("#FF6969", "#00A0D6"))
# Generate the color palette based on the number of breaks
numBreaks <- 6
colors <- colorPalette(numBreaks)
# Create a Leaflet map
rural_map <- leaflet(countries_sf_water_rural) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(fillColor = ~colorNumeric(palette=colors, domain = c(0, 20, 40, 60, 80, 100))(value_2019),
fillOpacity = 0.8,
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = ~WB_A3,
popup = ~paste("Country Code: ", WB_A3,
"<br>Country: ", country,
"<br>Proportion of population access to safely managed drinking water service: ", value_2019))%>%
addLegend(
position = "bottomright",
pal = colorNumeric(palette = colors, domain = c(0, 100)),
values = c(0, 20, 40, 60, 80, 100),
title = "Proportion (%)",
labels = c("0-20", "20-40", "40-60", "60-80", "80-100"),
opacity = 1
)
rural_map8.2 Create map for mortality rate
# Define the color palette function
colorPalette <- colorRampPalette(c("#FFFFDF", "#FFEEAA", "#FFBB55", "#FF7700", "#FF4400"))
# Generate the color palette based on the number of breaks
numBreaks <- 6
colors <- colorPalette(numBreaks)
# Create a Leaflet map
mortality_mapping <- leaflet(countries_sf_mortality) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(fillColor = ~colorNumeric(palette=colors, domain = c(0, 20, 40, 60, 80, 100))(mortality_rate_unsafe_water),
fillOpacity = 0.8,
weight = 2,
opacity = 1,
color = "grey",
dashArray = "3",
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = ~WB_A3,
popup = ~paste("Country Code: ", WB_A3,
"<br>Country: ", country,
"<br>Mortality Rate from Unsafe Sanitation and Water per 100,000: ", mortality_rate_unsafe_water))%>%
addLegend(
position = "bottomright",
pal = colorNumeric(palette = colors, domain = c(0, 100)),
values = c(0, 20, 40, 60, 80, 100),
title = "Proportion (100, 000)",
labels = c("0-20", "20-40", "40-60", "60-80", "80-100"),
opacity = 1
)Warning in colorNumeric(palette = colors, domain = c(0, 20, 40, 60, 80, : Some
values were outside the color scale and will be treated as NA
mortality_mapping# Plots of Indicator between mortality rate due to unsafe drinking water and proportion of population using safely managed drinking water services.merged_data <- merge(mortality_rate_unsafe_water, proportion_of_safe_water, by=c('geoAreaCode', 'geoAreaName'))
merged_data <- merged_data[!is.na(merged_data$value_2019), ] #remove NA values for 2019
# Display the first few rows of the merged dataframe
head(merged_data) geoAreaCode geoAreaName ObjectId.x goal_code.x goal_labelEN.x
1 100 Bulgaria 113 3 Goal 3
2 104 Myanmar 7 3 Goal 3
3 104 Myanmar 7 3 Goal 3
4 104 Myanmar 7 3 Goal 3
5 112 Belarus 102 3 Goal 3
6 116 Cambodia 117 3 Goal 3
goal_descEN.x target_code.x
1 Ensure healthy lives and promote well-being for all at all ages 3.9
2 Ensure healthy lives and promote well-being for all at all ages 3.9
3 Ensure healthy lives and promote well-being for all at all ages 3.9
4 Ensure healthy lives and promote well-being for all at all ages 3.9
5 Ensure healthy lives and promote well-being for all at all ages 3.9
6 Ensure healthy lives and promote well-being for all at all ages 3.9
target_descEN.x
1 By 2030, substantially reduce the number of deaths and illnesses from hazardous chemicals and air, water and soil pollution and contamination
2 By 2030, substantially reduce the number of deaths and illnesses from hazardous chemicals and air, water and soil pollution and contamination
3 By 2030, substantially reduce the number of deaths and illnesses from hazardous chemicals and air, water and soil pollution and contamination
4 By 2030, substantially reduce the number of deaths and illnesses from hazardous chemicals and air, water and soil pollution and contamination
5 By 2030, substantially reduce the number of deaths and illnesses from hazardous chemicals and air, water and soil pollution and contamination
6 By 2030, substantially reduce the number of deaths and illnesses from hazardous chemicals and air, water and soil pollution and contamination
indicator_code.x indicator_reference.x
1 C030902 3.9.2
2 C030902 3.9.2
3 C030902 3.9.2
4 C030902 3.9.2
5 C030902 3.9.2
6 C030902 3.9.2
indicator_descEN.x
1 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (exposure to unsafe Water, Sanitation and Hygiene for All (WASH) services)
2 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (exposure to unsafe Water, Sanitation and Hygiene for All (WASH) services)
3 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (exposure to unsafe Water, Sanitation and Hygiene for All (WASH) services)
4 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (exposure to unsafe Water, Sanitation and Hygiene for All (WASH) services)
5 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (exposure to unsafe Water, Sanitation and Hygiene for All (WASH) services)
6 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (exposure to unsafe Water, Sanitation and Hygiene for All (WASH) services)
series_release.x
1 2021.Q2.G.03
2 2021.Q2.G.03
3 2021.Q2.G.03
4 2021.Q2.G.03
5 2021.Q2.G.03
6 2021.Q2.G.03
series_tags.x
1 ['hygiene', 'health', 'mortality', 'death', 'water-related diseases', 'sanitation', 'water', 'drinking water']
2 ['hygiene', 'health', 'mortality', 'death', 'water-related diseases', 'sanitation', 'water', 'drinking water']
3 ['hygiene', 'health', 'mortality', 'death', 'water-related diseases', 'sanitation', 'water', 'drinking water']
4 ['hygiene', 'health', 'mortality', 'death', 'water-related diseases', 'sanitation', 'water', 'drinking water']
5 ['hygiene', 'health', 'mortality', 'death', 'water-related diseases', 'sanitation', 'water', 'drinking water']
6 ['hygiene', 'health', 'mortality', 'death', 'water-related diseases', 'sanitation', 'water', 'drinking water']
series.x
1 SH_STA_WASH
2 SH_STA_WASH
3 SH_STA_WASH
4 SH_STA_WASH
5 SH_STA_WASH
6 SH_STA_WASH
seriesDescription.x
1 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (deaths per 100,000 population)
2 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (deaths per 100,000 population)
3 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (deaths per 100,000 population)
4 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (deaths per 100,000 population)
5 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (deaths per 100,000 population)
6 Mortality rate attributed to unsafe water, unsafe sanitation and lack of hygiene (deaths per 100,000 population)
level_.x parentCode.x parentName.x type.x X.x Y.x ISO3.x
1 5 151 Eastern Europe Country 25.23763 42.75731 BGR
2 4 35 South-Eastern Asia Country 96.51752 21.19333 MMR
3 4 35 South-Eastern Asia Country 96.51752 21.19333 MMR
4 4 35 South-Eastern Asia Country 96.51752 21.19333 MMR
5 5 151 Eastern Europe Country 28.04940 53.54193 BLR
6 4 35 South-Eastern Asia Country 104.92284 12.71164 KHM
UN_Member.x Country_Profile.x UnitMultiplier.x Units_code.x
1 1 1 NA PER_100000_POP
2 1 1 NA PER_100000_POP
3 1 1 NA PER_100000_POP
4 1 1 NA PER_100000_POP
5 1 1 NA PER_100000_POP
6 1 1 NA PER_100000_POP
Units_desc.x timeSeries_id.x timeSeries_keys.x n_years.x min_year.x
1 Per 100,000 population SH_STA_WASH NA 1 2016
2 Per 100,000 population SH_STA_WASH NA 1 2016
3 Per 100,000 population SH_STA_WASH NA 1 2016
4 Per 100,000 population SH_STA_WASH NA 1 2016
5 Per 100,000 population SH_STA_WASH NA 1 2016
6 Per 100,000 population SH_STA_WASH NA 1 2016
max_year.x years.x value_2016.x latest_value.x footnotes.x nature.x
1 2016 [2016] 0.1 0.1 NA E: Estimated data
2 2016 [2016] 12.6 12.6 NA E: Estimated data
3 2016 [2016] 12.6 12.6 NA E: Estimated data
4 2016 [2016] 12.6 12.6 NA E: Estimated data
5 2016 [2016] 0.1 0.1 NA E: Estimated data
6 2016 [2016] 6.5 6.5 NA E: Estimated data
ObjectId.y goal_code.y goal_labelEN.y
1 263 6 Goal 6
2 204 6 Goal 6
3 203 6 Goal 6
4 202 6 Goal 6
5 250 6 Goal 6
6 264 6 Goal 6
goal_descEN.y
1 Ensure availability and sustainable management of water and sanitation for all
2 Ensure availability and sustainable management of water and sanitation for all
3 Ensure availability and sustainable management of water and sanitation for all
4 Ensure availability and sustainable management of water and sanitation for all
5 Ensure availability and sustainable management of water and sanitation for all
6 Ensure availability and sustainable management of water and sanitation for all
target_code.y
1 6.1
2 6.1
3 6.1
4 6.1
5 6.1
6 6.1
target_descEN.y
1 By 2030, achieve universal and equitable access to safe and affordable drinking water for all
2 By 2030, achieve universal and equitable access to safe and affordable drinking water for all
3 By 2030, achieve universal and equitable access to safe and affordable drinking water for all
4 By 2030, achieve universal and equitable access to safe and affordable drinking water for all
5 By 2030, achieve universal and equitable access to safe and affordable drinking water for all
6 By 2030, achieve universal and equitable access to safe and affordable drinking water for all
indicator_code.y indicator_reference.y
1 C060101 6.1.1
2 C060101 6.1.1
3 C060101 6.1.1
4 C060101 6.1.1
5 C060101 6.1.1
6 C060101 6.1.1
indicator_descEN.y
1 Proportion of population using safely managed drinking water services
2 Proportion of population using safely managed drinking water services
3 Proportion of population using safely managed drinking water services
4 Proportion of population using safely managed drinking water services
5 Proportion of population using safely managed drinking water services
6 Proportion of population using safely managed drinking water services
series_release.y series_tags.y series.y
1 2021.Q2.G.03 ['water', 'drinking water'] SH_H2O_SAFE
2 2021.Q2.G.03 ['water', 'drinking water'] SH_H2O_SAFE
3 2021.Q2.G.03 ['water', 'drinking water'] SH_H2O_SAFE
4 2021.Q2.G.03 ['water', 'drinking water'] SH_H2O_SAFE
5 2021.Q2.G.03 ['water', 'drinking water'] SH_H2O_SAFE
6 2021.Q2.G.03 ['water', 'drinking water'] SH_H2O_SAFE
seriesDescription.y
1 Proportion of population using safely managed drinking water services, by urban/rural (%)
2 Proportion of population using safely managed drinking water services, by urban/rural (%)
3 Proportion of population using safely managed drinking water services, by urban/rural (%)
4 Proportion of population using safely managed drinking water services, by urban/rural (%)
5 Proportion of population using safely managed drinking water services, by urban/rural (%)
6 Proportion of population using safely managed drinking water services, by urban/rural (%)
level_.y parentCode.y parentName.y type.y X.y Y.y ISO3.y
1 5 151 Eastern Europe Country 25.23763 42.75731 BGR
2 4 35 South-Eastern Asia Country 96.51752 21.19333 MMR
3 4 35 South-Eastern Asia Country 96.51752 21.19333 MMR
4 4 35 South-Eastern Asia Country 96.51752 21.19333 MMR
5 5 151 Eastern Europe Country 28.04940 53.54193 BLR
6 4 35 South-Eastern Asia Country 104.92284 12.71164 KHM
UN_Member.y Country_Profile.y location_code location_desc UnitMultiplier.y
1 1 1 _T All areas NA
2 1 1 U Urban NA
3 1 1 R Rural NA
4 1 1 _T All areas NA
5 1 1 _T All areas NA
6 1 1 _T All areas NA
Units_code.y Units_desc.y timeSeries_id.y timeSeries_keys.y n_years.y
1 PT Percentage SH_H2O_SAFE___T location 21
2 PT Percentage SH_H2O_SAFE__U location 21
3 PT Percentage SH_H2O_SAFE__R location 21
4 PT Percentage SH_H2O_SAFE___T location 21
5 PT Percentage SH_H2O_SAFE___T location 21
6 PT Percentage SH_H2O_SAFE___T location 21
min_year.y max_year.y
1 2000 2020
2 2000 2020
3 2000 2020
4 2000 2020
5 2000 2020
6 2000 2020
years.y
1 [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
2 [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
3 [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
4 [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
5 [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
6 [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
value_2000 value_2001 value_2002 value_2003 value_2004 value_2005 value_2006
1 90 90 91 91 92 92 93
2 51 52 54 56 58 59 61
3 18 20 21 23 24 26 27
4 27 29 30 32 33 35 37
5 81 81 81 81 83 84 85
6 17 17 18 18 19 19 20
value_2007 value_2008 value_2009 value_2010 value_2011 value_2012 value_2013
1 93 94 94 95 95 96 96
2 63 65 67 68 70 70 71
3 29 30 32 34 35 37 39
4 38 40 42 44 45 47 48
5 86 88 89 90 91 92 93
6 20 21 21 22 23 23 24
value_2014 value_2015 value_2016.y value_2017 value_2018 value_2019
1 97 97 97 97 97 98
2 71 71 72 72 73 73
3 41 43 45 47 49 51
4 50 51 53 54 56 58
5 94 94 95 95 95 95
6 24 25 25 26 27 27
value_2020 latest_value.y footnotes.y nature.y
1 98 98 NA E: Estimated data
2 74 74 NA E: Estimated data
3 52 52 NA E: Estimated data
4 59 59 NA E: Estimated data
5 95 95 NA E: Estimated data
6 28 28 NA E: Estimated data
8.3 Correlation
# Plot the data
plot_loess <- ggplot(merged_data, aes(x = latest_value.x, y = value_2019)) +
geom_point(aes(color = location_desc,
shape = location_desc,
text = paste("Country:", geoAreaName, "<br>", "Location:", location_desc)), alpha = 0.7) +
geom_smooth(data = subset(merged_data, location_desc == "All areas"),
method = loess, se = FALSE,
aes(color = location_desc, weight = value_2019),
show.legend = TRUE) +
geom_smooth(data = subset(merged_data, location_desc == "Urban"),
method = loess,
se = FALSE,
aes(color = location_desc, weight = value_2019),
show.legend = TRUE) +
geom_smooth(data = subset(merged_data, location_desc == "Rural"),
method = loess,
se = FALSE,
aes(color = location_desc, weight = value_2019),
show.legend = TRUE) +
labs(x = "Mortality Rate (deaths per 100,000 population)",
y = "Safely Managed Drinking Water Services (%)",
title = "Correlation between Mortality Rate and Water Services (2019)",
color = "Location Type",
shape = "Location Type") +
theme_minimal() +
guides(color = guide_legend(override.aes = list(shape = 16)),
shape = guide_legend(override.aes = list(color = "black")))Warning in geom_point(aes(color = location_desc, shape = location_desc, :
Ignoring unknown aesthetics: text
# Convert to a plotly plot
gp <- ggplotly(plot_loess, tooltip = "text")`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
# Print the plot
gpcorrelation_all <- cor(merged_data$latest_value.x[merged_data$location_desc == "All areas"],
merged_data$value_2019[merged_data$location_desc == "All areas"])
correlation_urban <- cor(merged_data$latest_value.x[merged_data$location_desc == "Urban"],
merged_data$value_2019[merged_data$location_desc == "Urban"])
correlation_rural <- cor(merged_data$latest_value.x[merged_data$location_desc == "Rural"],
merged_data$value_2019[merged_data$location_desc == "Rural"])
blank_plot <- ggplot() +
theme_void() +
theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
annotate("text",
x = 0.5,
y = 0.5,
label = paste("Correlation All:",
round(correlation_all, 2), "\n",
"Correlation Urban:",
round(correlation_urban, 2), "\n",
"Correlation Rural:",
round(correlation_rural, 2)),
size = 5,
color = "black",
hjust = 0.5)
# Convert to a plotly plot
gp_text <- ggplotly(blank_plot)
gp_text8.4 Barchart and stacked barchart (without Transformation)
stacked_bar_chart <- ggplot(merged_data, aes(x = latest_value.x, fill = location_desc)) +
geom_histogram(position = "stack", bins = 20) +
labs(x = "Mortality Rate (deaths per 100,000 population)",
y = "Count",
title = "Stacked Bar Chart of Mortality Rates") +
theme_minimal()
#change to plotly
ggplot_stacked_bar_chart <- ggplotly(stacked_bar_chart)
ggplot_stacked_bar_chartgrouped_bar_chart <- ggplot(merged_data, aes(x = latest_value.x, fill = location_desc)) +
geom_histogram(position = "dodge", bins = 20) +
labs(x = "Mortality Rate (deaths per 100,000 population)",
y = "Count",
title = "Grouped Bar Chart of Mortality Rates") +
theme_minimal()
# Convert to a plotly plot
ggplot_grouped_bar_chart <- ggplotly(grouped_bar_chart)
ggplot_grouped_bar_chart# Filter data for the histogram
merged_data_all<- subset(merged_data, location_desc == "All areas") # change to rural or urban if you want
histogram <- ggplot(merged_data_all, aes(x = latest_value.x,
text = paste("Country:", geoAreaName,
"<br>Mortality Rate:", latest_value.x))) +
geom_histogram(bins = 50, fill = 'grey', color = 'black') +
labs(x = 'Mortality Rate (deaths per 100,000 population)',
y = 'Count',
title = 'Histogram of Mortality Rates for "All" locations') +
theme_minimal()
# Convert ggplot histogram to a plotly plot
plotly_histogram <- ggplotly(histogram, tooltip = "text")8.5 Normalise Data
merged_data$latest_value.x_positive <- merged_data$latest_value.x + abs(min(merged_data$latest_value.x)) + 0.1
# Apply the Box-Cox transformation
bc_result <- MASS::boxcox(merged_data$latest_value.x_positive ~ 1,
lambda = seq(-3,3,0.1))
# The optimal lambda value is the one that maximizes the log-likelihood
optimal_lambda <- bc_result$x[which.max(bc_result$y)]
# Transform the data using the optimal lambda value
if (optimal_lambda == 0) {
merged_data$latest_value.x_bc <- log(merged_data$latest_value.x_positive)
} else {
merged_data$latest_value.x_bc <- (merged_data$latest_value.x_positive^optimal_lambda - 1) / optimal_lambda
}
# Shift the transformed variable to be non-negative
min_value <- min(merged_data$latest_value.x_bc)
merged_data$latest_value.x_bc <- merged_data$latest_value.x_bc + abs(min_value) + 0.18.6 Histogram (Normalised)
# Round the transformed mortality rates to the nearest whole number
merged_data$latest_value.x_bc_rounded <- round(merged_data$latest_value.x_bc)
# Convert the latest_value.x_bc_rounded variable into a categorical variable
merged_data$latest_value.x_bc_cat <- cut(merged_data$latest_value.x_bc_rounded, breaks = 50)
# Create a stacked bar chart
stacked_bar_chart_bc <- ggplot(merged_data, aes(x = latest_value.x_bc_rounded, fill = location_desc)) +
geom_bar(position = "stack", bins =20) +
labs(x = "Transformed Mortality Rate",
y = "Count",
title = "Stacked Bar Chart of Transformed Mortality Rates") +
theme_minimal()Warning in geom_bar(position = "stack", bins = 20): Ignoring unknown
parameters: `bins`
ggplot_stacked_bar_chart_bc <- ggplotly(stacked_bar_chart_bc)
ggplot_stacked_bar_chart_bc# Create a grouped bar chart
grouped_bar_chart_bc <- ggplot(merged_data, aes(x = latest_value.x_bc_rounded, fill = location_desc)) +
geom_bar(position = "dodge") +
labs(x = "Transformed Mortality Rate",
y = "Count",
title = "Grouped Bar Chart of Transformed Mortality Rates") +
theme_minimal()
# Convert to a plotly plot
ggplot_grouped_bar_chart_bc <- ggplotly(grouped_bar_chart_bc)
ggplot_grouped_bar_chart_bc8.7 Scatter plot Normalised
plot_loess <- ggplot(merged_data, aes(x = latest_value.x_bc, y = value_2019)) +
geom_point(aes(color = location_desc,
shape = location_desc,
text = paste("Country:", geoAreaName, "<br>", "Location:", location_desc)), alpha = 0.7) +
geom_smooth(data = subset(merged_data, location_desc == "All areas"),
method = loess, se = FALSE,
aes(color = location_desc, weight = value_2019),
show.legend = TRUE) +
geom_smooth(data = subset(merged_data, location_desc == "Urban"),
method = loess,
se = FALSE,
aes(color = location_desc, weight = value_2019),
show.legend = TRUE) +
geom_smooth(data = subset(merged_data, location_desc == "Rural"),
method = loess,
se = FALSE,
aes(color = location_desc, weight = value_2019),
show.legend = TRUE) +
labs(x = "Transformed Mortality Rate (deaths per 100,000 population)",
y = "Safely Managed Drinking Water Services (%)",
title = "Correlation between Transformed Mortality Rate and Water Services (Normalised)",
color = "Location Type",
shape = "Location Type") +
theme_minimal() +
guides(color = guide_legend(override.aes = list(shape = 16)),
shape = guide_legend(override.aes = list(color = "black")))Warning in geom_point(aes(color = location_desc, shape = location_desc, :
Ignoring unknown aesthetics: text
gp_normalised <- ggplotly(plot_loess, tooltip = "text")`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
gp_normalised8.8 Correlation normalised
correlation_all <- cor(merged_data$latest_value.x_bc[merged_data$location_desc == "All areas"],
merged_data$value_2019[merged_data$location_desc == "All areas"])
correlation_urban <- cor(merged_data$latest_value.x_bc[merged_data$location_desc == "Urban"],
merged_data$value_2019[merged_data$location_desc == "Urban"])
correlation_rural <- cor(merged_data$latest_value.x_bc[merged_data$location_desc == "Rural"],
merged_data$value_2019[merged_data$location_desc == "Rural"])
blank_plot <- ggplot() +
theme_void() +
theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
annotate("text",
x = 0.5,
y = 0.5,
label = paste("Correlation All:",
round(correlation_all, 2), "\n",
"Correlation Urban:",
round(correlation_urban, 2), "\n",
"Correlation Rural:",
round(correlation_rural, 2)),
size = 5,
color = "black",
hjust = 0.5)
gp_text_normalised <- ggplotly(blank_plot)
gp_text_normalised8.9 Combining normal and unormalised plots
combined_plot <- subplot(
gp, gp_text,
gp_normalised, gp_text_normalised,
nrows = 2, margin = 0.05
)
# Print the combined plot
combined_plot9 Four Indicators
#mortality_rate_attributed_to_unsafe_water
mortality_rate_unsafe_water_indicators <- read_csv("data/indicator_3.9.2.csv",show_col_types = FALSE)
#Proportion_of_population_using_safely_managed_drinking_water_services
proportion_of_safe_water_indicators<- read_csv("data/indicator_6.1.1.csv",show_col_types = FALSE)
#mortality_rate_unintentional_poisoning
mortality_rate_unintentional_poisoning_indicators <- read_csv("data/indicator_3.9.3.csv",show_col_types = FALSE)
#proportion-of-population-with-basic-handwashing-facilities-on-premises-by-urban-rural-percent
population_with_basic_handwashing_facilities_indicators<- read_csv("data/indicator_6.2.1.csv",show_col_types = FALSE)mortality_rate_unsafe_water_indicators <- dplyr::select(
mortality_rate_unsafe_water_indicators,
code = 'geoAreaCode',
country = 'geoAreaName',
region = 'parentName',
iso = 'ISO3',
MR_unsafe_water = 'latest_value'
)
mortality_rate_unsafe_water_indicators <- mortality_rate_unsafe_water_indicators %>%
distinct()
mortality_rate_unsafe_water_indicators# A tibble: 183 × 5
code country region iso MR_unsafe_water
<dbl> <chr> <chr> <chr> <dbl>
1 583 Micronesia (Federated States of) Oceania (exc. A… FSM 3.6
2 496 Mongolia Eastern Asia MNG 1.3
3 499 Montenegro Southern Europe MNE 0
4 504 Morocco Northern Africa MAR 1.9
5 508 Mozambique Eastern Africa MOZ 27.6
6 104 Myanmar South-Eastern A… MMR 12.6
7 516 Namibia Southern Africa NAM 18.3
8 524 Nepal Southern Asia (… NPL 19.8
9 528 Netherlands Western Europe NLD 0.2
10 554 New Zealand Australia and N… NZL 0.1
# ℹ 173 more rows
proportion_of_safe_water_indicators <- dplyr::select(
proportion_of_safe_water_indicators,
code = 'geoAreaCode',
country = 'geoAreaName',
region = 'parentName',
iso = 'ISO3',
urbanisation = "location_desc",
sanitation_access = "value_2019",
)
# Remove duplicates
proportion_of_safe_water_indicators <- proportion_of_safe_water_indicators%>%
distinct()
proportion_of_safe_water_indicators# A tibble: 286 × 6
code country region iso urbanisation sanitation_access
<dbl> <chr> <chr> <chr> <chr> <dbl>
1 807 North Macedonia South… MKD Rural 66
2 804 Ukraine Easte… UKR Urban 89
3 807 North Macedonia South… MKD Urban 85
4 826 United Kingdom of Great Br… North… GBR All areas 100
5 580 Northern Mariana Islands Ocean… MNP All areas 91
6 840 United States of America North… USA All areas 97
7 578 Norway North… NOR All areas 99
8 512 Oman Weste… OMN All areas 90
9 586 Pakistan South… PAK All areas 36
10 586 Pakistan South… PAK Rural 33
# ℹ 276 more rows
mortality_rate_unintentional_poisoning_indicators <- dplyr::select(
mortality_rate_unintentional_poisoning_indicators,
code = 'geoAreaCode',
country = 'geoAreaName',
region = 'parentName',
iso = 'ISO3',
gender = 'sex_desc',
MR_poisoning = 'value_2019'
)
# drop rows if contains at least one NA value
mortality_rate_unintentional_poisoning_indicators <- na.omit(mortality_rate_unintentional_poisoning_indicators)
# Remove duplicates
mortality_rate_unintentional_poisoning_indicators <- mortality_rate_unintentional_poisoning_indicators%>%
distinct()
#check for empty row
any(is.na(mortality_rate_unintentional_poisoning_indicators))[1] FALSE
mortality_rate_unintentional_poisoning_indicators# A tibble: 549 × 6
code country region iso gender MR_poisoning
<dbl> <chr> <chr> <chr> <chr> <dbl>
1 470 Malta Southern Europe MLT Both se… 0.1
2 152 Chile South America CHL Male 0.5
3 4 Afghanistan Southern Asia (excluding India) AFG Both se… 1
4 470 Malta Southern Europe MLT Female 0
5 156 China Eastern Asia CHN Both se… 1.8
6 470 Malta Southern Europe MLT Male 0.2
7 156 China Eastern Asia CHN Female 1.5
8 478 Mauritania Western Africa MRT Both se… 1.5
9 478 Mauritania Western Africa MRT Female 1.3
10 478 Mauritania Western Africa MRT Male 1.7
# ℹ 539 more rows
population_with_basic_handwashing_facilities_indicators <- dplyr::select(
population_with_basic_handwashing_facilities_indicators,
code = 'geoAreaCode',
country = 'geoAreaName',
region = 'parentName',
iso = 'ISO3',
urbanisation = 'location_desc',
handwash_access = 'value_2019'
)
# drop rows if contains at least one NA value
population_with_basic_handwashing_facilities_indicators <- na.omit(population_with_basic_handwashing_facilities_indicators)
# Remove duplicates
population_with_basic_handwashing_facilities_indicators <- population_with_basic_handwashing_facilities_indicators%>%
distinct()
#check for empty row
any(is.na(mortality_rate_unintentional_poisoning_indicators))[1] FALSE
population_with_basic_handwashing_facilities_indicators# A tibble: 250 × 6
code country region iso urbanisation handwash_access
<dbl> <chr> <chr> <chr> <chr> <dbl>
1 4 Afghanistan Southern Asia (excludi… AFG All areas 38
2 356 India Southern Asia IND Urban 82
3 360 Indonesia South-Eastern Asia IDN All areas 94
4 4 Afghanistan Southern Asia (excludi… AFG Rural 29
5 694 Sierra Leone Western Africa SLE Rural 18
6 360 Indonesia South-Eastern Asia IDN Rural 91
7 4 Afghanistan Southern Asia (excludi… AFG Urban 64
8 12 Algeria Northern Africa DZA All areas 85
9 12 Algeria Northern Africa DZA Rural 75
10 12 Algeria Northern Africa DZA Urban 88
# ℹ 240 more rows
9.1 Joining all 4 indicators
countries_tb <- left_join(
mortality_rate_unintentional_poisoning_indicators,
population_with_basic_handwashing_facilities_indicators,
by = join_by(country, iso)
)
countries_tb <- left_join(
countries_tb,
mortality_rate_unsafe_water_indicators,
by = join_by(iso)
)
countries_tb <- left_join(
countries_tb,
proportion_of_safe_water_indicators,
by = join_by(iso, urbanisation)
) |>
mutate(
iso = case_match(
iso,
"COD" ~ "ZAR",
"ROU" ~ "ROM",
"TLS" ~ "TMP",
"XKX" ~ "KSV",
.default = iso
)
)
names(countries_tb) [1] "code.x" "country.x" "region.x"
[4] "iso" "gender" "MR_poisoning"
[7] "code.y" "region.y" "urbanisation"
[10] "handwash_access" "code.x.x" "country.y"
[13] "region.x.x" "MR_unsafe_water" "code.y.y"
[16] "country" "region.y.y" "sanitation_access"
9.2 Joining with adminGeoJson
countries_sf <- read_sf("./data/WB_countries_Admin0.geojson")
countries_tb <- left_join(
countries_tb,
dplyr::select(countries_sf,WB_A3, GDP_MD_EST, INCOME_GRP, POP_EST),
by = c("iso" = "WB_A3")
)Warning in left_join(countries_tb, dplyr::select(countries_sf, WB_A3, GDP_MD_EST, : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 325 of `x` matches multiple rows in `y`.
ℹ Row 126 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
countries_tb# A tibble: 1,038 × 22
code.x country.x region.x iso gender MR_poisoning code.y region.y
<dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
1 470 Malta Southern Europe MLT Both … 0.1 NA <NA>
2 152 Chile South America CHL Male 0.5 NA <NA>
3 4 Afghanistan Southern Asia (… AFG Both … 1 4 Souther…
4 4 Afghanistan Southern Asia (… AFG Both … 1 4 Souther…
5 4 Afghanistan Southern Asia (… AFG Both … 1 4 Souther…
6 470 Malta Southern Europe MLT Female 0 NA <NA>
7 156 China Eastern Asia CHN Both … 1.8 NA <NA>
8 470 Malta Southern Europe MLT Male 0.2 NA <NA>
9 156 China Eastern Asia CHN Female 1.5 NA <NA>
10 478 Mauritania Western Africa MRT Both … 1.5 478 Western…
# ℹ 1,028 more rows
# ℹ 14 more variables: urbanisation <chr>, handwash_access <dbl>,
# code.x.x <dbl>, country.y <chr>, region.x.x <chr>, MR_unsafe_water <dbl>,
# code.y.y <dbl>, country <chr>, region.y.y <chr>, sanitation_access <dbl>,
# GDP_MD_EST <dbl>, INCOME_GRP <chr>, POP_EST <int>,
# geometry <MULTIPOLYGON [°]>
9.3 Clean data
Remove certain columns and rename column for cleaner a look
# Drop
clean_datatable <- dplyr::select(countries_tb,
-code.y, -code.x,
-region.y, -region.x,
-code.x.x, code.y.y,
-country.x, -country.y,
-region.x.x, -region.y.y,
-iso)
# Rename
clean_datatable <- clean_datatable %>% rename(
gdp = GDP_MD_EST,
income_group = INCOME_GRP,
population = POP_EST)
# Rearrange
clean_datatable <- clean_datatable %>%
dplyr::select(country,
gdp,
income_group,
population,
urbanisation,
gender,
handwash_access,
sanitation_access,
MR_poisoning,
MR_unsafe_water,)
datatable(clean_datatable)9.4 Correlation between 4 indicators
# filter gender of both sexes to reduce duplicates
countries_tb <- countries_tb |>
filter(gender == "Both sexes") |>
na.omit(countries_tb)
countries_tb# A tibble: 143 × 22
code.x country.x region.x iso gender MR_poisoning code.y region.y
<dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
1 4 Afghanistan Southern Asia (… AFG Both … 1 4 Souther…
2 4 Afghanistan Southern Asia (… AFG Both … 1 4 Souther…
3 4 Afghanistan Southern Asia (… AFG Both … 1 4 Souther…
4 484 Mexico Central America MEX Both … 0.4 484 Central…
5 496 Mongolia Eastern Asia MNG Both … 2.8 496 Eastern…
6 496 Mongolia Eastern Asia MNG Both … 2.8 496 Eastern…
7 496 Mongolia Eastern Asia MNG Both … 2.8 496 Eastern…
8 499 Montenegro Southern Europe MNE Both … 0.6 499 Souther…
9 499 Montenegro Southern Europe MNE Both … 0.6 499 Souther…
10 104 Myanmar South-Eastern A… MMR Both … 1.3 104 South-E…
# ℹ 133 more rows
# ℹ 14 more variables: urbanisation <chr>, handwash_access <dbl>,
# code.x.x <dbl>, country.y <chr>, region.x.x <chr>, MR_unsafe_water <dbl>,
# code.y.y <dbl>, country <chr>, region.y.y <chr>, sanitation_access <dbl>,
# GDP_MD_EST <dbl>, INCOME_GRP <chr>, POP_EST <int>,
# geometry <MULTIPOLYGON [°]>
countries_tb_subset <- dplyr::select(countries_tb, MR_poisoning, MR_unsafe_water, handwash_access, sanitation_access,urbanisation,country.x)
# Display the subsetted data
print(countries_tb_subset)# A tibble: 143 × 6
MR_poisoning MR_unsafe_water handwash_access sanitation_access urbanisation
<dbl> <dbl> <dbl> <dbl> <chr>
1 1 13.9 38 27 All areas
2 1 13.9 29 24 Rural
3 1 13.9 64 36 Urban
4 0.4 1.1 90 43 All areas
5 2.8 1.3 84 30 All areas
6 2.8 1.3 77 11 Rural
7 2.8 1.3 88 38 Urban
8 0.6 0 99 85 All areas
9 0.6 0 99 87 Urban
10 1.3 12.6 74 58 All areas
# ℹ 133 more rows
# ℹ 1 more variable: country.x <chr>
9.5 Scatterplot matrix
#set axis names
countries_tb_subset_corr4 <- dplyr::select(countries_tb_subset, MR_unsafe_water, MR_poisoning, handwash_access, sanitation_access, urbanisation,country.x)
colnames(countries_tb_subset_corr4) <- c("Unsafe Water Mortality Rate",
"Poisoning Mortality",
"Handwashing Access (%)",
"Sanitation Access (%)",
"Urbanisation",
"Countries"
)
countries_tb_subset_corr4# A tibble: 143 × 6
`Unsafe Water Mortality Rate` `Poisoning Mortality` `Handwashing Access (%)`
<dbl> <dbl> <dbl>
1 13.9 1 38
2 13.9 1 29
3 13.9 1 64
4 1.1 0.4 90
5 1.3 2.8 84
6 1.3 2.8 77
7 1.3 2.8 88
8 0 0.6 99
9 0 0.6 99
10 12.6 1.3 74
# ℹ 133 more rows
# ℹ 3 more variables: `Sanitation Access (%)` <dbl>, Urbanisation <chr>,
# Countries <chr>
#change to factor for colour
countries_tb_subset_corr4$Urbanisation <- as.factor(countries_tb_subset_corr4$Urbanisation)
plot <- ggpairs(data = countries_tb_subset_corr4,
columns = 1:4,
upper = list(continuous = "cor"),
lower = list(continuous = "smooth", se=FALSE),
diag = list(continuous = "bar", bin=30),
mapping = aes(color = Urbanisation),
tooltips = c("Countries")) +
theme_minimal() +
theme(axis.title = element_text(size = 5))Warning in warn_if_args_exist(list(...)): Extra arguments: "tooltips" are being
ignored. If these are meant to be aesthetics, submit them using the 'mapping'
variable within ggpairs with ggplot2::aes or ggplot2::aes_string.
Warning in check_and_set_ggpairs_defaults("diag", diag, continuous =
"densityDiag", : Changing diag$continuous from 'bar' to 'barDiag'
plot`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Convert ggplot to plotly
plot_interactive <- ggplotly(plot)`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Can only have one: highlight
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Can only have one: highlight
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Can only have one: highlight
plot_interactive <- layout(plot_interactive,
font = list(size = 8)
)
# Render the plotly scatterplot matrix
plot_interactive9.6 Different Scatter plot matrix
countries_tb_subset_corr4 <- dplyr::select(countries_tb_subset, MR_unsafe_water, MR_poisoning, handwash_access, sanitation_access, urbanisation)
# Rename the columns
colnames(countries_tb_subset_corr4) <- c("Unsafe Water Mortality Rate",
"Poisoning Mortality",
"Handwashing Access (%)",
"Sanitation Access (%)",
"Urbanisation")
# Make urbanisation as a factor for colourisation
countries_tb_subset_corr4$Urbanisation <- as.factor(countries_tb_subset_corr4$Urbanisation)
# Map urbanisation to colors
color_mapping <- c("All areas" = "red", "Urban" = "blue", "Rural" = "green") # Adjust to your actual factor levels and desired colors
colors <- color_mapping[countries_tb_subset_corr4$Urbanisation]
# Create scatterplot matrix without colors
plot <- pairs.panels(countries_tb_subset_corr4[, 1:4],
hist.col = "#00AFBB",
bg = "transparent")